[1]:
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
plt.rcParams['figure.constrained_layout.use'] = True

import dask
print(f'dask: {dask.__version__}')
import dask.array as da
dask.config.set({'array.chunk-size': '512MiB'})

import xarray as xr
dask: 2.11.0
[ ]:
import sys
print(sys.executable)
[2]:
from psutil import virtual_memory
import gc
# gc.collect() # run garbage collection to free possible memory

mem = virtual_memory()
print(f'Physical memory: {mem.total/1024/1024/1024:.0f} Gb')  # total physical memory available
Physical memory: 504 Gb
[3]:
import logging
logging.basicConfig(filename='example.log', level=logging.DEBUG)
[4]:
%load_ext autoreload

%autoreload 2

import toolbox_scs as tb
print(tb.__file__)
from toolbox_scs.routines.boz import load_dssc_module

from extra_data import open_run
/home/lleguy/notebooks/ToolBox/src/toolbox_scs/__init__.py

Parameters

[ ]:
proposalNB = 2719
dark_runNB = 180
runNB = 179
module_group = 0
pulse_pattern = ['pumped', 'unpumped']
xaxis = 'delay' # 'nrj'
bin_width = 0.1 # [ps]
path = f'/gpfs/exfel/exp/SCS/202002/p002719/scratch/tests/r{runNB}/'
[ ]:
moduleNB = list(range(module_group*4, (module_group+1)*4))

Processing function

[44]:
def process(module):
    # Load dark
    arr_dark, tid_dark = load_dssc_module(proposalNB, dark_runNB, module, drop_intra_darks=False)
    arr_dark = arr_dark.rechunk(('auto', -1, -1, -1))
    dark_img = arr_dark.mean(axis=0).compute()

    # Load module data
    arr, tid = load_dssc_module(proposalNB, runNB, module, drop_intra_darks=False)
    arr = arr.rechunk(('auto', -1, -1, -1))

    # dark and intra dark correction
    arr = arr - dark_img
    arr = arr[:, ::2, :, :] - arr[:, 1::2, :, :]

    # Load slow data against which to bin
    if xaxis == 'delay':
        run, v = tb.load(proposalNB, runNB, ['PP800_DelayLine', 'BAM1932M', 'SCS_XGM'])
    else:
        run, v = tb.load(proposalNB, runNB, [xaxis, 'SCS_XGM'])

    # select part of the run
    # v = v.isel({'trainId':slice(0,3000)})

    # combine slow and DSSC module data
    xr_data = xr.DataArray(arr,
                       coords={'trainId': tid,
                               'sa3_pId': v['sa3_pId'].values},
                       dims = ['trainId', 'sa3_pId', 'y', 'x'])
    xr_data = xr_data.expand_dims(module=[module], axis=2)
    r = xr.merge([xr_data.to_dataset(name='DSSC'), v], join='inner')

    # calculate bins
    if xaxis == 'delay':
        r['delay'] = tb.misc.positionToDelay(r['PP800_DelayLine'])
        bam = r['BAM1932M'] - r['BAM1932M'].mean()
        r['bin_delay'] = ((r['delay'] - bam)/bin_width).round()*bin_width
    else:
        r['bin_' + xaxis] = (r[xaxis]/bin_width).round()*bin_width

    # add the pulse pattern coordinates
    Nrepeats = int(len(v['sa3_pId'].values)/len(pulse_pattern))
    pp = pulse_pattern*Nrepeats
    pp = np.array(pp)
    r = r.assign_coords(pp=("sa3_pId", pp))

    # select pattern and bin data
    bin_data = None
    for p in np.unique(pp):
        # slice using non-index coordinates
        # https://github.com/pydata/xarray/issues/2028
        sub_r = r.sel(sa3_pId=(r.pp == p))

        # calculate mean on bin, then mean to remove the dimension
        res = sub_r.groupby('bin_'+xaxis).mean().mean(['sa3_pId'])

        if bin_data is None:
            bin_data = res
            bin_data['DSSC'] = res['DSSC'].expand_dims(pp=[p])
            bin_data['SCS_SA3'] = res['SCS_SA3'].expand_dims(pp=[p])
        else:
            bin_data = xr.merge([bin_data,
                                 res['DSSC'].expand_dims(pp=[p]),
                                 res['SCS_SA3'].expand_dims(pp=[p])])

    # save the result
    fname = path + f'run{runNB}-darkrun{dark_runNB}-module{module}.h5'
    print(fname)
    bin_data.to_netcdf(fname, format='NETCDF4', engine='h5netcdf')

Processing

[ ]:
for m in moduleNB:
    process(m)